import sys, os
import numpy as np
import pandas as pd
import qlib
from pathlib import Path
scripts_dir = Path("/data/students/huzb/qlib/scripts")
print(scripts_dir.joinpath("get_data.py"))
assert scripts_dir.joinpath("get_data.py").exists()
/data/students/huzb/qlib/scripts/get_data.py
if not scripts_dir.joinpath("get_data.py").exists():
# download get_data.py script
scripts_dir = Path("~/tmp/qlib_code/scripts").expanduser().resolve()
scripts_dir.mkdir(parents=True, exist_ok=True)
import requests
with requests.get("https://raw.githubusercontent.com/microsoft/qlib/main/scripts/get_data.py") as resp:
with open(scripts_dir.joinpath("get_data.py"), "wb") as fp:
fp.write(resp.content)
from qlib.constant import REG_CN
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict
在项目根目录运行python setup.py build_ext --inplace解决
provider_uri = "/data/students/huzb/qlib/qlib_data/cn_data" # target_dir
# if not exists_qlib_data(provider_uri):
# print(f"Qlib data is not found in {provider_uri}")
# sys.path.append(str(scripts_dir))
# from get_data import GetData
# GetData().qlib_data(target_dir=provider_uri, region=REG_CN)
qlib.init(provider_uri=provider_uri, region=REG_CN)
[18754:MainThread](2022-10-31 14:07:45,351) INFO - qlib.Initialization - [config.py:413] - default_conf: client.
[18754:MainThread](2022-10-31 14:07:45,357) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[18754:MainThread](2022-10-31 14:07:45,358) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/data/students/huzb/qlib/qlib_data/cn_data')}
market = "csi300"
benchmark = "SH000300"
from qlib.data import D
from qlib.data.filter import ExpressionDFilter
from qlib.data.filter import NameDFilter
# from dateutil.relativedelta import relativedelta
instruments = D.instruments(market='csi300')
fields = ['$close']
f_d = D.features(instruments, fields, start_time='2008-01-01', end_time='2020-08-01', freq='day')
df = f_d
w = df.index[-8:-7].get_level_values('datetime') # 倒数第8个日期
df.index = df.index.get_level_values('datetime')
print(df.index.min(), df.index.max())
start_time = pd.to_datetime(df.index.min())
end_time = pd.to_datetime(df.index.max())
week_time = pd.to_datetime(w.max())
print(start_time.strftime('%Y-%m-%d'), end_time.strftime('%Y-%m-%d'), week_time.strftime('%Y-%m-%d'))
2008-01-02 00:00:00 2020-07-31 00:00:00 2008-01-02 2020-07-31 2020-07-22
experiment_name="online_srv"
###################################
# train model
###################################
data_handler_config = {
"start_time": start_time, #
"end_time": end_time,
"fit_start_time": start_time,
"fit_end_time": "2014-12-31",
"instruments": market,
"infer_processors": [
{
"class": "RobustZScoreNorm",
"kwargs": {
"fields_group": "feature",
"clip_outlier": True
}
},
{
"class": "Fillna",
"kwargs": {
"fields_group": "feature"
}
}
],
"learn_processors": [
{
"class": "DropnaLabel"
},
{
"class": "CSRankNorm",
"kwargs": {
"fields_group": "label"
}
}
],
"label": [
"Ref($close, -8) / Ref($close, -1) - 1"
]
}
task = {
"model": {
"class": "TransformerModel",
"module_path": "qlib.contrib.model.pytorch_transformer",
"kwargs": {
"d_feat": 6,
"seed": 0
},
},
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Huzb360",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config,
},
"segments": {
"train": (start_time, "2014-12-31"),
"valid": ("2015-01-01", "2016-12-31"),
"test": ("2017-01-01", week_time),
},
},
},
}
# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
[18754:MainThread](2022-10-31 14:07:55,128) INFO - qlib.TransformerModel - [pytorch_transformer.py:65] - Naive Transformer: batch_size : 2048 device : cuda:0 [18754:MainThread](2022-10-31 14:08:26,842) INFO - qlib.timer - [log.py:117] - Time cost: 27.177s | Loading data Done /home/huzb/anaconda3/envs/py38/lib/python3.8/site-packages/numpy/lib/nanfunctions.py:1095: RuntimeWarning: All-NaN slice encountered result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) [18754:MainThread](2022-10-31 14:10:29,093) INFO - qlib.timer - [log.py:117] - Time cost: 120.586s | RobustZScoreNorm Done [18754:MainThread](2022-10-31 14:10:29,989) INFO - qlib.timer - [log.py:117] - Time cost: 0.892s | Fillna Done [18754:MainThread](2022-10-31 14:10:31,274) INFO - qlib.timer - [log.py:117] - Time cost: 0.702s | DropnaLabel Done /data/students/huzb/qlib/qlib/data/dataset/processor.py:352: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df[cols] = t [18754:MainThread](2022-10-31 14:10:32,380) INFO - qlib.timer - [log.py:117] - Time cost: 1.104s | CSRankNorm Done [18754:MainThread](2022-10-31 14:10:32,381) INFO - qlib.timer - [log.py:117] - Time cost: 125.536s | fit & process data Done [18754:MainThread](2022-10-31 14:10:32,382) INFO - qlib.timer - [log.py:117] - Time cost: 152.717s | Init data Done
# start exp to train model
experiment_id = 'cn_backtest'
# experiment_name: Optional[Text] = None,
# recorder_id: Optional[Text] = None,
# with R.start(experiment_name=experiment_name, experimen
# t_id=experiment_id):
with R.start(experiment_name=experiment_name):
R.log_params(**flatten_dict(task))
model.fit(dataset)
R.save_objects(trained_model=model)
rid = R.get_recorder().id
# prediction
recorder = R.get_recorder()
sr = SignalRecord(model, dataset, recorder)
sr.generate()
[18754:MainThread](2022-10-31 14:10:37,724) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7fc8d9138fd0> [18754:MainThread](2022-10-31 14:10:37,842) INFO - qlib.workflow - [exp.py:257] - Experiment 1 starts running ... [18754:MainThread](2022-10-31 14:10:38,307) INFO - qlib.workflow - [recorder.py:295] - Recorder 536d024401be432ea8d4bbbeed032861 starts running under Experiment 1 ... [18754:MainThread](2022-10-31 14:10:41,532) INFO - qlib.TransformerModel - [pytorch_transformer.py:191] - training... [18754:MainThread](2022-10-31 14:10:41,534) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch0: [18754:MainThread](2022-10-31 14:10:41,536) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:11:03,077) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:11:14,440) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -1.000089, valid -0.995834 [18754:MainThread](2022-10-31 14:11:14,450) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch1: [18754:MainThread](2022-10-31 14:11:14,451) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:11:35,495) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:11:46,943) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996287, valid -0.994542 [18754:MainThread](2022-10-31 14:11:46,951) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch2: [18754:MainThread](2022-10-31 14:11:46,952) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:12:08,197) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:12:19,707) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996294, valid -0.994722 [18754:MainThread](2022-10-31 14:12:19,710) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch3: [18754:MainThread](2022-10-31 14:12:19,711) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:12:41,094) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:12:52,648) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.997152, valid -0.997195 [18754:MainThread](2022-10-31 14:12:52,651) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch4: [18754:MainThread](2022-10-31 14:12:52,653) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:13:14,062) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:13:25,630) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996107, valid -0.994453 [18754:MainThread](2022-10-31 14:13:25,639) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch5: [18754:MainThread](2022-10-31 14:13:25,640) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:13:47,060) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:13:58,621) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.999094, valid -0.997211 [18754:MainThread](2022-10-31 14:13:58,622) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch6: [18754:MainThread](2022-10-31 14:13:58,623) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:14:20,219) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:14:31,987) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.995804, valid -0.993554 [18754:MainThread](2022-10-31 14:14:31,996) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch7: [18754:MainThread](2022-10-31 14:14:31,997) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:14:53,442) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:15:05,014) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.995852, valid -0.993349 [18754:MainThread](2022-10-31 14:15:05,023) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch8: [18754:MainThread](2022-10-31 14:15:05,024) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:15:26,456) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:15:38,043) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996506, valid -0.994279 [18754:MainThread](2022-10-31 14:15:38,045) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch9: [18754:MainThread](2022-10-31 14:15:38,046) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:15:59,481) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:16:11,057) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.997879, valid -0.993217 [18754:MainThread](2022-10-31 14:16:11,066) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch10: [18754:MainThread](2022-10-31 14:16:11,067) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:16:32,499) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:16:44,076) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.997670, valid -0.997404 [18754:MainThread](2022-10-31 14:16:44,079) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch11: [18754:MainThread](2022-10-31 14:16:44,081) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:17:05,502) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:17:17,108) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996070, valid -0.994799 [18754:MainThread](2022-10-31 14:17:17,109) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch12: [18754:MainThread](2022-10-31 14:17:17,111) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:17:38,550) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:17:50,125) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.995956, valid -0.993564 [18754:MainThread](2022-10-31 14:17:50,128) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch13: [18754:MainThread](2022-10-31 14:17:50,129) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:18:11,582) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:18:23,156) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996032, valid -0.993774 [18754:MainThread](2022-10-31 14:18:23,159) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch14: [18754:MainThread](2022-10-31 14:18:23,160) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:18:44,604) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:18:56,190) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.996620, valid -0.993523 [18754:MainThread](2022-10-31 14:18:56,194) INFO - qlib.TransformerModel - [pytorch_transformer.py:213] - early stop [18754:MainThread](2022-10-31 14:18:56,195) INFO - qlib.TransformerModel - [pytorch_transformer.py:216] - best score: -0.993217 @ 9 [18754:MainThread](2022-10-31 14:19:01,443) INFO - qlib.workflow - [record_temp.py:195] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 1
'The following are prediction results of the TransformerModel model.'
score
datetime instrument
2017-01-03 SH600000 0.057386
SH600008 0.050349
SH600009 0.097617
SH600010 0.047750
SH600015 0.020047
[18754:MainThread](2022-10-31 14:19:02,768) INFO - qlib.timer - [log.py:117] - Time cost: 0.000s | waiting `async_log` Done
model.predict(dataset)
datetime instrument
2017-01-03 SH600000 0.057386
SH600008 0.050349
SH600009 0.097617
SH600010 0.047750
SH600015 0.020047
...
2020-07-22 SZ300413 -0.045579
SZ300433 -0.094042
SZ300498 0.041083
SZ300601 0.077929
SZ300628 0.046742
Length: 259200, dtype: float32
week_score = model.predict(dataset)
# dataset.handler._infer.iloc[200].to_string()
# sr.list()
# sr.load('pred.pkl')
# sr.load('label.pkl')
experiment_name="online_srv"
###################################
# train model
###################################
data_handler_config = {
"start_time": start_time, #
"end_time": end_time,
"fit_start_time": start_time,
"fit_end_time": "2014-12-31",
"instruments": market,
"infer_processors": [
{
"class": "RobustZScoreNorm",
"kwargs": {
"fields_group": "feature",
"clip_outlier": True
}
},
{
"class": "Fillna",
"kwargs": {
"fields_group": "feature"
}
}
],
"learn_processors": [
{
"class": "DropnaLabel"
},
{
"class": "CSRankNorm",
"kwargs": {
"fields_group": "label"
}
}
],
"label": [
"Ref($close, -2) / Ref($close, -1) - 1"
]
}
task = {
"model": {
"class": "TransformerModel",
"module_path": "qlib.contrib.model.pytorch_transformer",
"kwargs": {
"d_feat": 6,
"seed": 0
},
},
"dataset": {
"class": "DatasetH",
"module_path": "qlib.data.dataset",
"kwargs": {
"handler": {
"class": "Alpha360",
"module_path": "qlib.contrib.data.handler",
"kwargs": data_handler_config,
},
"segments": {
"train": (start_time, "2014-12-31"),
"valid": ("2015-01-01", "2016-12-31"),
"test": ("2017-01-01", week_time),
},
},
},
}
# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])
[18754:MainThread](2022-10-31 14:19:16,593) INFO - qlib.TransformerModel - [pytorch_transformer.py:65] - Naive Transformer: batch_size : 2048 device : cuda:0 [18754:MainThread](2022-10-31 14:19:43,619) INFO - qlib.timer - [log.py:117] - Time cost: 27.008s | Loading data Done /home/huzb/anaconda3/envs/py38/lib/python3.8/site-packages/numpy/lib/nanfunctions.py:1095: RuntimeWarning: All-NaN slice encountered result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input) [18754:MainThread](2022-10-31 14:21:48,515) INFO - qlib.timer - [log.py:117] - Time cost: 123.168s | RobustZScoreNorm Done [18754:MainThread](2022-10-31 14:21:49,457) INFO - qlib.timer - [log.py:117] - Time cost: 0.939s | Fillna Done [18754:MainThread](2022-10-31 14:21:50,748) INFO - qlib.timer - [log.py:117] - Time cost: 0.699s | DropnaLabel Done /data/students/huzb/qlib/qlib/data/dataset/processor.py:352: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df[cols] = t [18754:MainThread](2022-10-31 14:21:51,594) INFO - qlib.timer - [log.py:117] - Time cost: 0.845s | CSRankNorm Done [18754:MainThread](2022-10-31 14:21:51,596) INFO - qlib.timer - [log.py:117] - Time cost: 127.975s | fit & process data Done [18754:MainThread](2022-10-31 14:21:51,597) INFO - qlib.timer - [log.py:117] - Time cost: 154.986s | Init data Done
# start exp to train model
experiment_id = 'cn_backtest'
# experiment_name: Optional[Text] = None,
# recorder_id: Optional[Text] = None,
# with R.start(experiment_name=experiment_name, experimen
# t_id=experiment_id):
with R.start(experiment_name=experiment_name):
R.log_params(**flatten_dict(task))
model.fit(dataset)
R.save_objects(trained_model=model)
rid = R.get_recorder().id
# prediction
recorder = R.get_recorder()
sr = SignalRecord(model, dataset, recorder)
sr.generate()
[18754:MainThread](2022-10-31 14:21:56,969) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7fc8cf8373a0> [18754:MainThread](2022-10-31 14:21:56,973) INFO - qlib.workflow - [exp.py:257] - Experiment 1 starts running ... [18754:MainThread](2022-10-31 14:21:56,986) INFO - qlib.workflow - [recorder.py:295] - Recorder 197cf96e0bff47a98c89b072335417ad starts running under Experiment 1 ... [18754:MainThread](2022-10-31 14:22:00,824) INFO - qlib.TransformerModel - [pytorch_transformer.py:191] - training... [18754:MainThread](2022-10-31 14:22:00,826) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch0: [18754:MainThread](2022-10-31 14:22:00,827) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:22:21,847) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:22:33,445) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993972, valid -0.994532 [18754:MainThread](2022-10-31 14:22:33,454) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch1: [18754:MainThread](2022-10-31 14:22:33,455) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:22:54,683) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:23:06,367) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993800, valid -0.995058 [18754:MainThread](2022-10-31 14:23:06,370) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch2: [18754:MainThread](2022-10-31 14:23:06,372) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:23:27,814) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:23:39,564) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993447, valid -0.995081 [18754:MainThread](2022-10-31 14:23:39,568) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch3: [18754:MainThread](2022-10-31 14:23:39,569) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:24:01,055) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:24:12,899) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.994193, valid -0.996027 [18754:MainThread](2022-10-31 14:24:12,902) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch4: [18754:MainThread](2022-10-31 14:24:12,904) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:24:34,380) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:24:46,138) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993148, valid -0.993657 [18754:MainThread](2022-10-31 14:24:46,147) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch5: [18754:MainThread](2022-10-31 14:24:46,148) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:25:07,654) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:25:19,558) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993291, valid -0.995148 [18754:MainThread](2022-10-31 14:25:19,562) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch6: [18754:MainThread](2022-10-31 14:25:19,563) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:25:41,084) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:25:52,930) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993307, valid -0.995193 [18754:MainThread](2022-10-31 14:25:52,932) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch7: [18754:MainThread](2022-10-31 14:25:52,933) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:26:14,714) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:26:38,861) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993478, valid -0.994461 [18754:MainThread](2022-10-31 14:26:38,864) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch8: [18754:MainThread](2022-10-31 14:26:38,865) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:27:04,916) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:27:16,756) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.993732, valid -0.994290 [18754:MainThread](2022-10-31 14:27:16,759) INFO - qlib.TransformerModel - [pytorch_transformer.py:195] - Epoch9: [18754:MainThread](2022-10-31 14:27:16,761) INFO - qlib.TransformerModel - [pytorch_transformer.py:196] - training... [18754:MainThread](2022-10-31 14:27:38,309) INFO - qlib.TransformerModel - [pytorch_transformer.py:198] - evaluating... [18754:MainThread](2022-10-31 14:27:50,157) INFO - qlib.TransformerModel - [pytorch_transformer.py:201] - train -0.994229, valid -0.996022 [18754:MainThread](2022-10-31 14:27:50,160) INFO - qlib.TransformerModel - [pytorch_transformer.py:213] - early stop [18754:MainThread](2022-10-31 14:27:50,161) INFO - qlib.TransformerModel - [pytorch_transformer.py:216] - best score: -0.993657 @ 4 [18754:MainThread](2022-10-31 14:27:55,400) INFO - qlib.workflow - [record_temp.py:195] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 1
'The following are prediction results of the TransformerModel model.'
score
datetime instrument
2017-01-03 SH600000 0.029039
SH600008 0.014598
SH600009 0.046318
SH600010 0.037156
SH600015 -0.001555
[18754:MainThread](2022-10-31 14:27:56,544) INFO - qlib.timer - [log.py:117] - Time cost: 0.000s | waiting `async_log` Done
model.predict(dataset)
datetime instrument
2017-01-03 SH600000 0.029039
SH600008 0.014598
SH600009 0.046318
SH600010 0.037156
SH600015 -0.001555
...
2020-07-22 SZ300413 -0.102836
SZ300433 -0.064527
SZ300498 0.050756
SZ300601 -0.184252
SZ300628 -0.031806
Length: 259200, dtype: float32
week_score
datetime instrument
2017-01-03 SH600000 0.057386
SH600008 0.050349
SH600009 0.097617
SH600010 0.047750
SH600015 0.020047
...
2020-07-22 SZ300413 -0.045579
SZ300433 -0.094042
SZ300498 0.041083
SZ300601 0.077929
SZ300628 0.046742
Length: 259200, dtype: float32
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
"executor": {
"class": "SimulatorExecutor",
"module_path": "qlib.backtest.executor",
"kwargs": {
"time_per_step": "day",
"generate_portfolio_metrics": True,
},
},
"strategy": {
"class": "WeekTopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.signal_strategy",
"kwargs": {
"model": model,
"dataset": dataset,
"topk": 50,
"n_drop": 5,
"week_score" : week_score,
},
},
"backtest": {
"start_time": "2017-01-01",
"end_time": week_time,
"account": 100000000,
"benchmark": benchmark,
"exchange_kwargs": {
"freq": "day",
"limit_threshold": 0.095,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
},
},
}
# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
recorder = R.get_recorder(recorder_id=rid, experiment_name="online_srv")
model = recorder.load_object("trained_model")
# prediction
recorder = R.get_recorder()
ba_rid = recorder.id
sr = SignalRecord(model, dataset, recorder)
sr.generate()
# backtest & analysis
par = PortAnaRecord(recorder, port_analysis_config, "day")
par.generate()
[18754:MainThread](2022-10-31 14:28:02,006) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7fc8cf81af10> [18754:MainThread](2022-10-31 14:28:02,011) INFO - qlib.workflow - [exp.py:257] - Experiment 2 starts running ... [18754:MainThread](2022-10-31 14:28:02,020) INFO - qlib.workflow - [recorder.py:295] - Recorder 7522e4b0ba2842a2a8e5b8a8af3c1335 starts running under Experiment 2 ... [18754:MainThread](2022-10-31 14:28:07,817) INFO - qlib.workflow - [record_temp.py:195] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 2 [18754:MainThread](2022-10-31 14:28:07,926) INFO - qlib.backtest caller - [__init__.py:94] - Create new exchange
'The following are prediction results of the TransformerModel model.'
score
datetime instrument
2017-01-03 SH600000 0.029039
SH600008 0.014598
SH600009 0.046318
SH600010 0.037156
SH600015 -0.001555
[18754:MainThread](2022-10-31 14:28:18,233) WARNING - qlib.online operator - [exchange.py:216] - factor.day.bin file not exists or factor contains `nan`. Order using adjusted_price.
[18754:MainThread](2022-10-31 14:28:18,234) WARNING - qlib.online operator - [exchange.py:218] - trade unit 100 is not supported in adjusted_price mode.
/data/students/huzb/qlib/qlib/contrib/strategy/signal_strategy.py:98: DeprecationWarning: `model` `dataset` is deprecated; use `signal`.
warnings.warn("`model` `dataset` is deprecated; use `signal`.", DeprecationWarning)
[18754:MainThread](2022-10-31 14:28:27,194) WARNING - qlib.data - [data.py:662] - load calendar error: freq=day, future=True; return current calendar!
[18754:MainThread](2022-10-31 14:28:27,195) WARNING - qlib.data - [data.py:665] - You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md
[18754:MainThread](2022-10-31 14:28:27,225) WARNING - qlib.BaseExecutor - [executor.py:121] - `common_infra` is not set for <qlib.backtest.executor.SimulatorExecutor object at 0x7fc6f53c79d0>
backtest loop: 0%| | 0/864 [00:00<?, ?it/s]
/data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice return np.nanmean(self.data) /data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice return np.nanmean(self.data) /data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice return np.nanmean(self.data) [18754:MainThread](2022-10-31 14:28:44,296) INFO - qlib.workflow - [record_temp.py:500] - Portfolio analysis record 'port_analysis_1day.pkl' has been saved as the artifact of the Experiment 2 [18754:MainThread](2022-10-31 14:28:44,308) INFO - qlib.workflow - [record_temp.py:525] - Indicator analysis record 'indicator_analysis_1day.pkl' has been saved as the artifact of the Experiment 2 [18754:MainThread](2022-10-31 14:28:44,351) INFO - qlib.timer - [log.py:117] - Time cost: 0.015s | waiting `async_log` Done
'The following are analysis results of benchmark return(1day).'
risk
mean 0.000484
std 0.012217
annualized_return 0.115237
information_ratio 0.611431
max_drawdown -0.370479
'The following are analysis results of the excess return without cost(1day).'
risk
mean 0.000052
std 0.004551
annualized_return 0.012397
information_ratio 0.176583
max_drawdown -0.125891
'The following are analysis results of the excess return with cost(1day).'
risk
mean -0.000115
std 0.004549
annualized_return -0.027421
information_ratio -0.390694
max_drawdown -0.154673
'The following are analysis results of indicators(1day).'
value
ffr 1.0
pa 0.0
pos 0.0
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="online_srv")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
pred_df_dates = pred_df.index.get_level_values(level='datetime')
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")
{'class': 'Recorder', 'id': '7522e4b0ba2842a2a8e5b8a8af3c1335', 'name': 'mlflow_recorder', 'experiment_id': '1', 'start_time': '2022-10-31 14:28:02', 'end_time': '2022-10-31 14:28:44', 'status': 'FINISHED'}
report_normal_df
| account | return | total_turnover | turnover | total_cost | cost | value | cash | bench | |
|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||
| 2017-01-03 | 1.000000e+08 | 0.000000 | 0.000000e+00 | 0.000000 | 0.000000e+00 | 0.000000 | 0.000000e+00 | 1.000000e+08 | 0.009713 |
| 2017-01-04 | 9.995345e+07 | 0.000000 | 9.310000e+07 | 0.931000 | 4.655000e+04 | 0.000465 | 9.310000e+07 | 6.853450e+06 | 0.007803 |
| 2017-01-05 | 1.001064e+08 | 0.001737 | 1.154931e+08 | 0.224036 | 6.722309e+04 | 0.000207 | 9.671376e+07 | 3.392656e+06 | -0.000154 |
| 2017-01-06 | 9.938115e+07 | -0.007048 | 1.355414e+08 | 0.200270 | 8.694432e+04 | 0.000197 | 9.666233e+07 | 2.718824e+06 | -0.005974 |
| 2017-01-09 | 9.968660e+07 | 0.003269 | 1.550271e+08 | 0.196070 | 1.063679e+05 | 0.000195 | 9.711128e+07 | 2.575326e+06 | 0.004848 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-07-16 | 1.238827e+08 | -0.037897 | 1.508172e+10 | 0.217892 | 1.503960e+07 | 0.000218 | 1.231524e+08 | 7.303121e+05 | -0.048102 |
| 2020-07-17 | 1.246799e+08 | 0.006637 | 1.510672e+10 | 0.201846 | 1.506458e+07 | 0.000202 | 1.240268e+08 | 6.531417e+05 | 0.006299 |
| 2020-07-20 | 1.288573e+08 | 0.033680 | 1.512745e+10 | 0.166272 | 1.508645e+07 | 0.000175 | 1.259483e+08 | 2.909028e+06 | 0.029837 |
| 2020-07-21 | 1.293939e+08 | 0.004391 | 1.515771e+10 | 0.234783 | 1.511569e+07 | 0.000227 | 1.285522e+08 | 8.417050e+05 | 0.002295 |
| 2020-07-22 | 1.300935e+08 | 0.005641 | 1.518805e+10 | 0.234480 | 1.514602e+07 | 0.000234 | 1.293021e+08 | 7.913636e+05 | 0.004990 |
864 rows × 9 columns
analysis_df
| risk | ||
|---|---|---|
| excess_return_without_cost | mean | 0.000052 |
| std | 0.004551 | |
| annualized_return | 0.012397 | |
| information_ratio | 0.176583 | |
| max_drawdown | -0.125891 | |
| excess_return_with_cost | mean | -0.000115 |
| std | 0.004549 | |
| annualized_return | -0.027421 | |
| information_ratio | -0.390694 | |
| max_drawdown | -0.154673 |
analysis_position.report_graph(report_normal_df)
###################################
# prediction, backtest & analysis
###################################
port_analysis_config = {
"executor": {
"class": "SimulatorExecutor",
"module_path": "qlib.backtest.executor",
"kwargs": {
"time_per_step": "day",
"generate_portfolio_metrics": True,
},
},
"strategy": {
"class": "TopkDropoutStrategy",
"module_path": "qlib.contrib.strategy.signal_strategy",
"kwargs": {
"model": model,
"dataset": dataset,
"topk": 50,
"n_drop": 5,
},
},
"backtest": {
"start_time": "2017-01-01",
"end_time": week_time,
"account": 100000000,
"benchmark": benchmark,
"exchange_kwargs": {
"freq": "day",
"limit_threshold": 0.095,
"deal_price": "close",
"open_cost": 0.0005,
"close_cost": 0.0015,
"min_cost": 5,
},
},
}
# backtest and analysis
with R.start(experiment_name="backtest_analysis"):
recorder = R.get_recorder(recorder_id=rid, experiment_name="online_srv")
model = recorder.load_object("trained_model")
# prediction
recorder = R.get_recorder()
ba_rid = recorder.id
sr = SignalRecord(model, dataset, recorder)
sr.generate()
# backtest & analysis
par = PortAnaRecord(recorder, port_analysis_config, "day")
par.generate()
[6701:MainThread](2022-10-26 17:06:54,245) INFO - qlib.workflow - [expm.py:315] - <mlflow.tracking.client.MlflowClient object at 0x7f88473e7970> [6701:MainThread](2022-10-26 17:06:54,248) INFO - qlib.workflow - [exp.py:257] - Experiment 2 starts running ... [6701:MainThread](2022-10-26 17:06:54,269) INFO - qlib.workflow - [recorder.py:295] - Recorder b1b48a22537b429599d9a77ef49b908c starts running under Experiment 2 ... [6701:MainThread](2022-10-26 17:07:00,128) INFO - qlib.workflow - [record_temp.py:195] - Signal record 'pred.pkl' has been saved as the artifact of the Experiment 2 [6701:MainThread](2022-10-26 17:07:00,219) INFO - qlib.backtest caller - [__init__.py:94] - Create new exchange
'The following are prediction results of the TransformerModel model.'
score
datetime instrument
2017-01-03 SH600000 0.057386
SH600008 0.050349
SH600009 0.097617
SH600010 0.047750
SH600015 0.020047
[6701:MainThread](2022-10-26 17:07:08,183) WARNING - qlib.online operator - [exchange.py:216] - factor.day.bin file not exists or factor contains `nan`. Order using adjusted_price.
[6701:MainThread](2022-10-26 17:07:08,185) WARNING - qlib.online operator - [exchange.py:218] - trade unit 100 is not supported in adjusted_price mode.
/data/students/huzb/qlib/qlib/contrib/strategy/signal_strategy.py:98: DeprecationWarning: `model` `dataset` is deprecated; use `signal`.
warnings.warn("`model` `dataset` is deprecated; use `signal`.", DeprecationWarning)
[6701:MainThread](2022-10-26 17:07:16,966) WARNING - qlib.data - [data.py:662] - load calendar error: freq=day, future=True; return current calendar!
[6701:MainThread](2022-10-26 17:07:16,968) WARNING - qlib.data - [data.py:665] - You can get future calendar by referring to the following document: https://github.com/microsoft/qlib/blob/main/scripts/data_collector/contrib/README.md
[6701:MainThread](2022-10-26 17:07:16,988) WARNING - qlib.BaseExecutor - [executor.py:121] - `common_infra` is not set for <qlib.backtest.executor.SimulatorExecutor object at 0x7f85f5643a90>
backtest loop: 0%| | 0/864 [00:00<?, ?it/s]
/data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice return np.nanmean(self.data) /data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice return np.nanmean(self.data) /data/students/huzb/qlib/qlib/utils/index_data.py:482: RuntimeWarning: Mean of empty slice return np.nanmean(self.data) [6701:MainThread](2022-10-26 17:07:34,245) INFO - qlib.workflow - [record_temp.py:500] - Portfolio analysis record 'port_analysis_1day.pkl' has been saved as the artifact of the Experiment 2 [6701:MainThread](2022-10-26 17:07:34,257) INFO - qlib.workflow - [record_temp.py:525] - Indicator analysis record 'indicator_analysis_1day.pkl' has been saved as the artifact of the Experiment 2 [6701:MainThread](2022-10-26 17:07:34,303) INFO - qlib.timer - [log.py:117] - Time cost: 0.017s | waiting `async_log` Done
'The following are analysis results of benchmark return(1day).'
risk
mean 0.000484
std 0.012217
annualized_return 0.115237
information_ratio 0.611431
max_drawdown -0.370479
'The following are analysis results of the excess return without cost(1day).'
risk
mean 0.000096
std 0.004464
annualized_return 0.022863
information_ratio 0.331977
max_drawdown -0.102051
'The following are analysis results of the excess return with cost(1day).'
risk
mean -0.000057
std 0.004462
annualized_return -0.013459
information_ratio -0.195507
max_drawdown -0.132749
'The following are analysis results of indicators(1day).'
value
ffr 1.0
pa 0.0
pos 0.0
from qlib.contrib.report import analysis_model, analysis_position
from qlib.data import D
recorder = R.get_recorder(recorder_id=ba_rid, experiment_name="online_srv")
print(recorder)
pred_df = recorder.load_object("pred.pkl")
pred_df_dates = pred_df.index.get_level_values(level='datetime')
report_normal_df = recorder.load_object("portfolio_analysis/report_normal_1day.pkl")
positions = recorder.load_object("portfolio_analysis/positions_normal_1day.pkl")
analysis_df = recorder.load_object("portfolio_analysis/port_analysis_1day.pkl")
{'class': 'Recorder', 'id': 'b1b48a22537b429599d9a77ef49b908c', 'name': 'mlflow_recorder', 'experiment_id': '1', 'start_time': '2022-10-26 17:06:54', 'end_time': '2022-10-26 17:07:34', 'status': 'FINISHED'}
report_normal_df
| account | return | total_turnover | turnover | total_cost | cost | value | cash | bench | |
|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||
| 2017-01-03 | 1.000000e+08 | 0.000000 | 0.000000e+00 | 0.000000 | 0.000000e+00 | 0.000000 | 0.000000e+00 | 1.000000e+08 | 0.009713 |
| 2017-01-04 | 9.995345e+07 | 0.000000 | 9.310000e+07 | 0.931000 | 4.655000e+04 | 0.000465 | 9.310000e+07 | 6.853450e+06 | 0.007803 |
| 2017-01-05 | 1.000184e+08 | 0.000857 | 1.155191e+08 | 0.224295 | 6.725055e+04 | 0.000207 | 9.662274e+07 | 3.395665e+06 | -0.000154 |
| 2017-01-06 | 9.934240e+07 | -0.006573 | 1.333950e+08 | 0.178726 | 8.582183e+04 | 0.000186 | 9.457452e+07 | 4.767881e+06 | -0.005974 |
| 2017-01-09 | 9.957669e+07 | 0.002568 | 1.541864e+08 | 0.209290 | 1.066828e+05 | 0.000210 | 9.469054e+07 | 4.886157e+06 | 0.004848 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-07-16 | 1.291435e+08 | -0.048746 | 1.404609e+10 | 0.166102 | 1.400338e+07 | 0.000166 | 1.285583e+08 | 5.851626e+05 | -0.048102 |
| 2020-07-17 | 1.306797e+08 | 0.012098 | 1.407219e+10 | 0.202101 | 1.402954e+07 | 0.000203 | 1.300024e+08 | 6.772376e+05 | 0.006299 |
| 2020-07-20 | 1.344544e+08 | 0.029044 | 1.409303e+10 | 0.159532 | 1.405033e+07 | 0.000159 | 1.339081e+08 | 5.462803e+05 | 0.029837 |
| 2020-07-21 | 1.359566e+08 | 0.011348 | 1.411657e+10 | 0.175068 | 1.407391e+07 | 0.000175 | 1.353454e+08 | 6.112465e+05 | 0.002295 |
| 2020-07-22 | 1.367051e+08 | 0.005693 | 1.414205e+10 | 0.187413 | 1.409943e+07 | 0.000188 | 1.360429e+08 | 6.621639e+05 | 0.004990 |
864 rows × 9 columns
analysis_position.report_graph(report_normal_df)
analysis_df
| risk | ||
|---|---|---|
| excess_return_without_cost | mean | 0.000096 |
| std | 0.004464 | |
| annualized_return | 0.022863 | |
| information_ratio | 0.331977 | |
| max_drawdown | -0.102051 | |
| excess_return_with_cost | mean | -0.000057 |
| std | 0.004462 | |
| annualized_return | -0.013459 | |
| information_ratio | -0.195507 | |
| max_drawdown | -0.132749 |